{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a47dd557",
   "metadata": {},
   "source": [
    "### Misexpressed gene enrichment analysis "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9b504a0e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from statsmodels.discrete import discrete_model\n",
    "from statsmodels.stats import multitest\n",
    "import numpy as np\n",
    "from patsy import dmatrices\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "131d03ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "# inputs \n",
    "wkdir = \"/lustre/scratch126/humgen/projects/interval_rna/interval_rna_seq/thomasVDS/misexpression_v3\"\n",
    "wkdir_path = Path(wkdir)\n",
    "\n",
    "gene_features_path = wkdir_path.joinpath(\"3_misexp_genes/inactive_gene_features_8650.csv\")\n",
    "out_dir_path = wkdir_path.joinpath(\"3_misexp_genes/results\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "72d0de1d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of features: 82\n",
      "Number of genes: 8650\n"
     ]
    }
   ],
   "source": [
    "# load features \n",
    "gene_features_df = pd.read_csv(gene_features_path)\n",
    "features = [col for col in gene_features_df.columns if col not in [\"gene_id\", \"gene_type\"]]\n",
    "print(f\"Number of features: {len(features)}\")\n",
    "gene_ids_with_features = gene_features_df.gene_id.unique()\n",
    "print(f\"Number of genes: {len(gene_ids_with_features)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "ba2c0822",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.657092\n",
      "         Iterations 7\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692673\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692559\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.680872\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688081\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688669\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688824\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688182\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.680269\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687728\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688371\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687301\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689166\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687713\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.685497\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687344\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688178\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688615\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689769\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688531\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688704\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689022\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687819\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.684635\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687531\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690707\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688283\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690377\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687727\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691906\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689168\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689559\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689073\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692770\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691766\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691843\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691762\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692797\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691787\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689695\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692782\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690887\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692756\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692771\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692806\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692448\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692692\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692702\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692712\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692502\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689394\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690384\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691221\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691705\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692188\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689631\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691928\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692293\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635786\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641962\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.645741\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.633032\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.633960\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.638917\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642317\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.629663\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.631658\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.637494\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.637755\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.640264\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.636210\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.639348\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.640032\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.616032\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.636704\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692753\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692717\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691277\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687620\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688229\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.667521\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.661434\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.657092\n",
      "         Iterations 7\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692673\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692559\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.680872\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688081\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688669\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688824\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688182\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.680269\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687728\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688371\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687301\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689166\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687713\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.685497\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687344\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688178\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688615\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689769\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688531\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688704\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689022\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687819\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.684635\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687531\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690707\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688283\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690377\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687727\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691906\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689168\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689559\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689073\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692770\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691766\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691843\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691762\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692797\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691787\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689695\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692782\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690887\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692756\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692771\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692806\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692448\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692692\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692702\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692712\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692502\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689394\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690384\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691221\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691705\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692188\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689631\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691928\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692293\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635786\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641962\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.645741\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.633032\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.633960\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.638917\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642317\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.629663\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.631658\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.637494\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.637755\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.640264\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.636210\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.639348\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.640032\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.616032\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.636704\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692753\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692717\n",
      "         Iterations 3\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691277\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687620\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688229\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.667521\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.661434\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.658036\n",
      "         Iterations 7\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687650\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.686950\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.672124\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681694\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681877\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682159\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682215\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.671405\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.680103\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681044\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.680118\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682584\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.680102\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.679547\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.680769\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682480\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681227\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.683524\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682082\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681829\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681561\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681869\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.676299\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681132\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.684976\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682191\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.684045\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681259\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.686877\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681694\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.681949\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682894\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687638\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688021\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687940\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688016\n",
      "         Iterations 4\n",
      "Warning: Maximum number of iterations has been exceeded.\n",
      "         Current function value: 0.687966\n",
      "         Iterations: 35\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688093\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687585\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688096\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687505\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687910\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688054\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688094\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687823\n",
      "         Iterations 4\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/nfs/users/nfs_t/tv5/.conda/envs/tv5_base/lib/python3.7/site-packages/statsmodels/base/model.py:606: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688022\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687935\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688063\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688102\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687550\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687854\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.686928\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687253\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687427\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.684516\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687519\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687143\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.591405\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597667\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.601382\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.589575\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.591851\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.595756\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598572\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.589454\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.589124\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.593470\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.591458\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.593508\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.589366\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.592209\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.593138\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.566549\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.589609\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687569\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688045\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.685913\n",
      "         Iterations 4\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.683130\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.683558\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.662546\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.657003\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.595152\n",
      "         Iterations 7\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.610718\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.609934\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598337\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606791\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.607796\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606461\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606789\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597630\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.605656\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.605098\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.604401\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606999\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.604943\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.605664\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606629\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.607855\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.605310\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.608886\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606944\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.604242\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606668\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606977\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.602323\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606042\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.609617\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606527\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.609020\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.606565\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611447\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.605883\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.604898\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.607460\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.608649\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.610028\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612009\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611861\n",
      "         Iterations 6\n",
      "Warning: Maximum number of iterations has been exceeded.\n",
      "         Current function value: 0.612243\n",
      "         Iterations: 35\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611441\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.610745\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611364\n",
      "         Iterations 12\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612175\n",
      "         Iterations 5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/nfs/users/nfs_t/tv5/.conda/envs/tv5_base/lib/python3.7/site-packages/statsmodels/base/model.py:606: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611925\n",
      "         Iterations 9\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612275\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612324\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612324\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612290\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612252\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612183\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611861\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611708\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611323\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611295\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611942\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612029\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.610409\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612143\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611945\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.477856\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.481443\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.483109\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478164\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478867\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479920\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.481878\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478419\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.477666\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.477163\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.471362\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.473885\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.473797\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.475496\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.475677\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.459509\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.473975\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611312\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.611522\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.610986\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.610306\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.610194\n",
      "         Iterations 6\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597565\n",
      "         Iterations 5\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.593866\n",
      "         Iterations 5\n"
     ]
    }
   ],
   "source": [
    "enrich_logr_results, count = {}, 0\n",
    "for z_score in [2, 10, 20, 30]:\n",
    "    gene_misexp_gene_path = wkdir_path.joinpath(f\"2_misexp_qc/misexp_metrics/misexp_genes_tpm0.5_z{z_score}.txt\")\n",
    "    gene_misexp = set(pd.read_csv(gene_misexp_gene_path, sep=\"\\t\", header=None)[0].tolist())\n",
    "    gene_never_misexp_path = wkdir_path.joinpath(f\"2_misexp_qc/misexp_metrics/never_misexp_genes_tpm0.5_z{z_score}.txt\")\n",
    "    gene_never_misexp = set(pd.read_csv(gene_never_misexp_path, sep=\"\\t\", header=None)[0].tolist())\n",
    "    total_genes = len(gene_misexp) + len(gene_never_misexp)\n",
    "    if total_genes != len(gene_ids_with_features): \n",
    "        raise ValueError(\"Mismatch between gene total genes and numeber of genes with features.\")\n",
    "    # categorical variable: misexpressed, never misexpressed \n",
    "    gene_features_df[\"misexp_group\"] = np.where(gene_features_df.gene_id.isin(gene_never_misexp), 0, 1)\n",
    "    # logistic regression \n",
    "    for feature in features:\n",
    "        input_df = gene_features_df.copy()\n",
    "        input_df[f\"{feature}_norm\"] = (input_df[feature] - input_df[feature].mean())/input_df[feature].std()\n",
    "        y, X = dmatrices(f\"misexp_group ~ {feature}_norm\", input_df, return_type = 'dataframe')\n",
    "        logit_fit = discrete_model.Logit(endog=y, exog=X).fit()\n",
    "        log_odds, pval = logit_fit.params[1], logit_fit.pvalues[1]\n",
    "        # normal approximation confidence intervals\n",
    "        lower_conf = logit_fit.conf_int(alpha=0.05)[0][1]\n",
    "        upper_conf = logit_fit.conf_int(alpha=0.05)[1][1]\n",
    "        enrich_logr_results[count] = [z_score, feature, log_odds, lower_conf, upper_conf, pval]\n",
    "        count += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5ec8a675",
   "metadata": {},
   "outputs": [],
   "source": [
    "# results \n",
    "enrich_logr_results_df = pd.DataFrame.from_dict(enrich_logr_results, \n",
    "                                                orient=\"index\", \n",
    "                                                columns=[\"z_score\", \"feature\", \"log_odds\", \"lower\", \"upper\", \"pval\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "cd582334",
   "metadata": {},
   "outputs": [],
   "source": [
    "# multiple testing correction \n",
    "pval_as_array = enrich_logr_results_df.pval.to_numpy()\n",
    "# FDR BH and Bonferroni correction \n",
    "for method in [\"fdr_bh\", \"bonferroni\"]:\n",
    "    pass_test, pval_adj, _, _ = multitest.multipletests(pval_as_array, alpha=0.05, method=method)\n",
    "    enrich_logr_results_df[f\"{method}_pass\"] = pass_test\n",
    "    enrich_logr_results_df[f\"{method}_pval_adj\"] = pval_adj\n",
    "    enrich_logr_results_pass_df = enrich_logr_results_df[enrich_logr_results_df[f\"{method}_pass\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "bf63b3ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "# feature names\n",
    "feature_name_dict = {\"gene_length\": \"Gene length\", \n",
    "                     \"gene_count_1Mb\" : \"Genes within 1Mb\", \n",
    "                     \"gene_distance_min\": \"Min. gene distance\", \n",
    "                     \"pHaplo\": \"pHaplo\", \n",
    "                     \"pTriplo\": \"pTriplo\", \n",
    "                     \"EDS\": \"Enhancer Domain Score (EDS)\", \n",
    "                     'oe_lof_upper': \"gnomAD LOEUF\", \n",
    "                     'oe_mis_upper': \"gnomAD missense OEUF\", \n",
    "                     'Episcore': 'Episcore', \n",
    "                     'gene_fraction_TssA': 'ChromHMM Active TSS', \n",
    "                     'gene_fraction_TssAFlnk': 'ChromHMM Flanking active TSS', \n",
    "                     'gene_fraction_TxFlnk': \"ChromHMM Transcr. at gene 5' and 3'\",\n",
    "                     'gene_fraction_Tx': \"ChromHMM Strong transcription\", \n",
    "                     'gene_fraction_TxWk': \"ChromHMM Weak transcription\",\n",
    "                     'gene_fraction_EnhG': \"ChromHMM Genic enhancers\", \n",
    "                     'gene_fraction_Enh': \"ChromHMM Enhancers\", \n",
    "                     'gene_fraction_ZNFRpts': \"ChromHMM ZNF genes and repeats\",\n",
    "                     'gene_fraction_Het': \"ChromHMM Heterochromatin\", \n",
    "                     'gene_fraction_TssBiv': \"ChromHMM Bivalent/Poised TSS\", \n",
    "                     'gene_fraction_BivFlnk': \"ChromHMM Flanking Bivalent TSS/Enh\",\n",
    "                     'gene_fraction_EnhBiv': \"ChromHMM Bivalent Enhancer\", \n",
    "                     'gene_fraction_ReprPC': \"ChromHMM Repressed PolyComb\", \n",
    "                     'gene_fraction_ReprPCWk': \"ChromHMM Weak Repressed PolyComb\",\n",
    "                     'gene_fraction_Quies': \"ChromHMM Quiescent/Low\",\n",
    "                     'ActivityLinking_Conserved_nt_count': 'Conserved enhancer bp (activity-linking)', \n",
    "                     'ActivityLinking_nt_count': 'Enhancer nt bp (activity-linking)',\n",
    "                     'ProximityLinking_Conserved_nt_count': 'Conserved enhancer bp (proximity-linking)', \n",
    "                     'ProximityLinking_nt_count': 'Enhancer bp (proximity-linking)',\n",
    "                     'ActivityLinking_EnhancerNumber': \"# enhancers (activity-linking)\",\n",
    "                     'ActivityLinking_NumberConservedElements': \"Conserved # enhancers (activity-linking)\",\n",
    "                     'ProximityLinking_EnhancerNumber': \"# enhancers (proximity-linking)\",\n",
    "                     'ProximityLinking_NumberConservedElements': \"Conserved # enhancers (proximity-linking)\",\n",
    "                     'brain_expression': \"Brain expression\", \n",
    "                     'active_gene_count_1Mb': \"Expressed gene within 1Mb\",\n",
    "                     'gnomad_autosomal_dominant': 'Autosomal dominant gene',\n",
    "                     'gnomad_haploinsufficient': 'Haploinsufficient gene',\n",
    "                     'gnomad_autosomal_recessive': 'Autosomal recessive gene',\n",
    "                     'gnomad_olfactory_genes': \"Olfactory gene\",\n",
    "                     'oncogene': 'High-confidence oncogenes',\n",
    "                     \"approved_target\": \"Approved drug target\",\n",
    "                     \"gwas_gene\": \"GWAS gene\",\n",
    "                     \"decipher_gene\": \"DECIPHER gene\", \n",
    "                     'adipose_expression': 'Adipose expression', \n",
    "                     'adrenal_gland_expression': 'Adrenal gland expression', \n",
    "                     'artery_expression': 'Artery', \n",
    "                     'bladder_expression': 'Bladder', \n",
    "                     'brain_expression': 'Brain', \n",
    "                     'breast_expression': 'Breast', \n",
    "                     'cervix_expression': 'Cervix', \n",
    "                     'colon_expression': 'Colon', \n",
    "                     'esophagus_expression': 'Esophagus', \n",
    "                     'fallopian_tube_expression': 'Fallopian tube', \n",
    "                     'heart_expression': 'Heart', \n",
    "                     'kidney_expression': 'Kidney', \n",
    "                     'liver_expression': 'Liver', \n",
    "                     'lung_expression': 'Lung', \n",
    "                     'minor_salivary_gland_expression': 'Minor salivary gland', \n",
    "                     'muscle_expression': 'Muscle', \n",
    "                     'nerve_expression': 'Nerve', \n",
    "                     'ovary_expression': 'Ovary', \n",
    "                     'pancreas_expression': 'Pancreas', \n",
    "                     'pituitary_expression': 'Pituitary', \n",
    "                     'prostate_expression': 'Prostate', \n",
    "                     'skin_expression': 'Skin', \n",
    "                     'small_intestine_expression': 'Small intestine', \n",
    "                     'spleen_expression': 'Spleen', \n",
    "                     'stomach_expression': 'Stomach', \n",
    "                     'testis_expression': 'Testis', \n",
    "                     'thyroid_expression': 'Thyroid', \n",
    "                     'uterus_expression': 'Uterus', \n",
    "                     'vagina_expression': 'Vagina', \n",
    "                     'tissue_expression': \"# tissues expressed\",\n",
    "                     'active_gene_distance': 'Distance to expressed gene',\n",
    "                     'fraction_A': \"A compartment overlap\",\n",
    "                     'fraction_B': \"B compartment overlap\",\n",
    "                     'fraction_unassigned': \"Unassigned compartment overlap\",\n",
    "                     'pLI': 'pLI', \n",
    "                     'pNull': 'Probability of complete haplosufficiency', \n",
    "                     'pRec': 'Probability of recessive lethality', \n",
    "                     'gerp_element_per_bp': '# conserved elements per bp',\n",
    "                     'tad_distance': 'Min. TAD boundary distance', \n",
    "                      'phylop_mean': \"Mean gene body conservation\",\n",
    "                     'omim_gene': 'OMIM', \n",
    "                     \"protein_coding\": \"Protein-coding\"\n",
    "                    }\n",
    "\n",
    "enrich_logr_results_df[\"feature_name\"] = enrich_logr_results_df.feature.replace(feature_name_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5dfb3992",
   "metadata": {},
   "outputs": [],
   "source": [
    "# feature groups \n",
    "feature_groups_dict = {\"Expression\": [\"adipose_expression\", \"adrenal_gland_expression\",\n",
    "                                      \"artery_expression\", \"bladder_expression\", \"brain_expression\", \n",
    "                                      \"breast_expression\", \"cervix_expression\", \"colon_expression\", \n",
    "                                      \"esophagus_expression\", \"fallopian_tube_expression\", \"heart_expression\",\n",
    "                                      \"kidney_expression\", \"liver_expression\", \"lung_expression\", \n",
    "                                      \"minor_salivary_gland_expression\", \"muscle_expression\", \n",
    "                                      \"nerve_expression\", \"ovary_expression\", \"pancreas_expression\", \n",
    "                                      \"pituitary_expression\", \"prostate_expression\", \"skin_expression\", \n",
    "                                      \"spleen_expression\", \"stomach_expression\", \"testis_expression\", \n",
    "                                      \"thyroid_expression\", \"uterus_expression\", \"vagina_expression\", \n",
    "                                      \"small_intestine_expression\", \"tissue_expression\"\n",
    "                                     ],\n",
    "                       \"Genomic\": [\"active_gene_distance\",\"active_gene_count_1Mb\", \n",
    "                                  \"gene_count_1Mb\", \"gene_length\", \"gene_distance_min\"],\n",
    "                       \"Constraint &\\nconservation\": ['pHaplo', 'pTriplo', 'EDS', 'oe_lof_upper', \n",
    "                                      'oe_mis_upper','Episcore', \"pLI\", \"pNull\", \"pRec\", \"phylop_mean\", \"gerp_element_per_bp\"],\n",
    "                       \"Regulation\": ['ProximityLinking_Conserved_nt_count', 'ProximityLinking_nt_count',\n",
    "                                      'ActivityLinking_EnhancerNumber', 'ActivityLinking_NumberConservedElements',\n",
    "                                      'ProximityLinking_EnhancerNumber','ProximityLinking_NumberConservedElements', \n",
    "                                     \"gene_fraction_TssA\", \"gene_fraction_TssAFlnk\", \"gene_fraction_TxFlnk\", \n",
    "                                      \"gene_fraction_Tx\", \"gene_fraction_TxWk\", \"gene_fraction_EnhG\", \n",
    "                                      \"gene_fraction_Enh\", \"gene_fraction_ZNFRpts\", \"gene_fraction_Het\", \n",
    "                                      \"gene_fraction_TssBiv\", \"gene_fraction_BivFlnk\", \"gene_fraction_EnhBiv\", \n",
    "                                     \"gene_fraction_ReprPC\", \"gene_fraction_ReprPCWk\", \"gene_fraction_Quies\", \n",
    "                                      \"fraction_A\", \"fraction_B\", \"fraction_unassigned\", \"ActivityLinking_Conserved_nt_count\", \n",
    "                                      \"ActivityLinking_nt_count\", \"tad_distance\"\n",
    "                                     ],\n",
    "                       \"Gene sets\": [\"gnomad_autosomal_dominant\", \"gnomad_haploinsufficient\", \n",
    "                                     \"gnomad_autosomal_recessive\", \"gnomad_olfactory_genes\", \"oncogene\", \n",
    "                                    \"approved_target\", \"decipher_gene\", \"omim_gene\", \"protein_coding\"\n",
    "                                    ]\n",
    "                      }\n",
    "features_to_group_dict = {}\n",
    "for group in feature_groups_dict.keys(): \n",
    "    for feature in feature_groups_dict[group]: \n",
    "        features_to_group_dict[feature] = group \n",
    "enrich_logr_results_df[\"feature_group\"] = enrich_logr_results_df.feature.replace(features_to_group_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "ffcd0c2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# calculate absolute value of log-odds \n",
    "enrich_logr_results_df[\"log_odds_abs\"] = enrich_logr_results_df.log_odds.abs()\n",
    "# write all results to file \n",
    "out_dir_path.mkdir(parents=True, exist_ok=True)\n",
    "enrich_logr_results_path = out_dir_path.joinpath(\"enrich_logr_results_all_rev_zscores.csv\")\n",
    "enrich_logr_results_df.to_csv(enrich_logr_results_path, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "f8a56497",
   "metadata": {},
   "outputs": [],
   "source": [
    "# z-score > 2, passing Bonferroni correction  \n",
    "enrich_logr_results_pass_z2_df = enrich_logr_results_df[(enrich_logr_results_df[\"bonferroni_pass\"]) & \n",
    "                                                     (enrich_logr_results_df[\"z_score\"] == 2)\n",
    "                                                    ].sort_values(by=\"log_odds_abs\", ascending=False)\n",
    "# write all features \n",
    "enrich_logr_results_pass_z2_path = out_dir_path.joinpath(f\"enrich_logr_results_pass_bonf_z2_all.csv\")\n",
    "enrich_logr_results_pass_z2_df.to_csv(enrich_logr_results_pass_z2_path, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "b19500d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# show top 15 features in main figure\n",
    "top_results = 15\n",
    "enrich_logr_results_pass_z2_top_results_df = enrich_logr_results_pass_z2_df.head(top_results).copy()\n",
    "all_top_features = set(enrich_logr_results_pass_z2_top_results_df.feature.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "75ac6fbd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of gene set features: 38\n",
      "Number of quantitative features: 44\n"
     ]
    }
   ],
   "source": [
    "feature_names_in_plot = {'Gene length':'Gene length',\n",
    "                         'gnomAD LOEUF':'gnomAD LOEUF',\n",
    "                         'OMIM':'OMIM',\n",
    "                         '# enhancers (activity-linking)': '# enhancers (activity)',\n",
    "                         '# enhancers (proximity-linking)': '# enhancers (proximity)',\n",
    "                         'Conserved enhancer bp (proximity-linking)': 'Conserved enhancer bp (proximity)',\n",
    "                         'Conserved # enhancers (proximity-linking)': 'Conserved # enhancers (proximity)',\n",
    "                         'Enhancer bp (proximity-linking)': 'Enhancer bp (proximity)',\n",
    "                         'Brain': 'Brain expression',\n",
    "                         '# tissues expressed': '# tissues expressed',\n",
    "                         'Enhancer Domain Score (EDS)': 'Enhancer Domain Score', \n",
    "                         'Episcore': 'Episcore',\n",
    "                         \"gnomAD missense OEUF\": \"gnomAD missense OEUF\", \n",
    "                         'Pituitary': 'Pituitary expression', \n",
    "                         'Heart': 'Heart expression', \n",
    "                         \"Protein-coding\": \"Protein-coding\"\n",
    "                        }\n",
    "\n",
    "\n",
    "# write results adjusting name for plot \n",
    "enrich_logr_results_pass_z2_top_results_df[\"feature_name_trunc\"] = enrich_logr_results_pass_z2_top_results_df.feature_name.replace(feature_names_in_plot)\n",
    "# feature groups \n",
    "gene_sets = feature_groups_dict[\"Expression\"] + feature_groups_dict[\"Gene sets\"]\n",
    "gene_sets.remove(\"tissue_expression\")\n",
    "print(f\"Number of gene set features: {len(gene_sets)}\")\n",
    "# quantitative features\n",
    "quant_features = feature_groups_dict[\"Regulation\"] + feature_groups_dict[\"Constraint &\\nconservation\"] + feature_groups_dict[\"Genomic\"] + [\"tissue_expression\"]\n",
    "print(f\"Number of quantitative features: {len(quant_features)}\")\n",
    "features_class_dict = {**{feature: \"Quantitative\" for feature in quant_features}, **{feature: \"Binary\" for feature in gene_sets}}\n",
    "\n",
    "enrich_logr_results_pass_z2_top_results_df[\"class\"] = enrich_logr_results_pass_z2_top_results_df.feature.replace(features_class_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "46ca4f2f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# write to file \n",
    "enrich_logr_results_pass_z2_top_results_path = out_dir_path.joinpath(f\"enrich_logr_results_pass_bonf_z2_top{top_results}.csv\")\n",
    "enrich_logr_results_pass_z2_top_results_df.to_csv(enrich_logr_results_pass_z2_top_results_path, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "3c6ed9b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# write all results, adjusting log odds where failing bonferroni correction (for heatmap)\n",
    "enrich_logr_results_df[\"log_odds_adj\"] = np.where(enrich_logr_results_df[\"bonferroni_pass\"], \n",
    "                                                  enrich_logr_results_df.log_odds, \n",
    "                                                  np.nan \n",
    "                                                 )\n",
    "enrich_logr_results_log_odds_adj_path = out_dir_path.joinpath(f\"enrich_logr_results_all_zscores_bonf_log_odds_adj.csv\")\n",
    "enrich_logr_results_df.to_csv(enrich_logr_results_log_odds_adj_path, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95c698a4",
   "metadata": {},
   "source": [
    "### Enrichment test splitting protein-coding and lncRNA genes "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ffcc5546",
   "metadata": {},
   "source": [
    "Enrichment testing for lncRNAs only"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "af0bc682",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of features: 73\n",
      "Number of genes: 5558\n"
     ]
    }
   ],
   "source": [
    "gene_lncrna_features_df = gene_features_df[gene_features_df.protein_coding == 0].copy()\n",
    "\n",
    "# remove features that have all entries = 0\n",
    "lncrna_features_to_rmv = [\"gene_id\", \"gene_type\", \"gene_fraction_TxFlnk\", \"gnomad_autosomal_dominant\", \n",
    "                          \"gnomad_haploinsufficient\", \"gnomad_autosomal_recessive\", \"gnomad_olfactory_genes\", \n",
    "                          \"oncogene\", \"approved_target\", \"decipher_gene\", \"protein_coding\", \"misexp_group\"\n",
    "                         ]\n",
    "\n",
    "lncrna_features = [col for col in gene_lncrna_features_df.columns if col not in lncrna_features_to_rmv]\n",
    "\n",
    "print(f\"Number of features: {len(lncrna_features)}\")\n",
    "lncrna_gene_ids_with_features = gene_lncrna_features_df.gene_id.unique()\n",
    "print(f\"Number of genes: {len(lncrna_gene_ids_with_features)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "8ab00c2b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gene_length\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635029\n",
      "         Iterations 6\n",
      "gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669514\n",
      "         Iterations 4\n",
      "gene_distance_min\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670562\n",
      "         Iterations 4\n",
      "tissue_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668792\n",
      "         Iterations 4\n",
      "adipose_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670192\n",
      "         Iterations 4\n",
      "adrenal_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670105\n",
      "         Iterations 4\n",
      "artery_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670187\n",
      "         Iterations 4\n",
      "bladder_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670316\n",
      "         Iterations 4\n",
      "brain_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.666524\n",
      "         Iterations 4\n",
      "breast_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670215\n",
      "         Iterations 4\n",
      "cervix_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670488\n",
      "         Iterations 4\n",
      "colon_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670591\n",
      "         Iterations 4\n",
      "esophagus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670546\n",
      "         Iterations 4\n",
      "fallopian_tube_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670513\n",
      "         Iterations 4\n",
      "heart_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669485\n",
      "         Iterations 4\n",
      "kidney_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670510\n",
      "         Iterations 4\n",
      "liver_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670097\n",
      "         Iterations 4\n",
      "lung_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670469\n",
      "         Iterations 4\n",
      "minor_salivary_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670592\n",
      "         Iterations 4\n",
      "muscle_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670236\n",
      "         Iterations 4\n",
      "nerve_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670369\n",
      "         Iterations 4\n",
      "ovary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670225\n",
      "         Iterations 4\n",
      "pancreas_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670038\n",
      "         Iterations 4\n",
      "pituitary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670056\n",
      "         Iterations 4\n",
      "prostate_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670371\n",
      "         Iterations 4\n",
      "skin_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670588\n",
      "         Iterations 4\n",
      "small_intestine_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670519\n",
      "         Iterations 4\n",
      "spleen_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669932\n",
      "         Iterations 4\n",
      "stomach_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670382\n",
      "         Iterations 4\n",
      "testis_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670541\n",
      "         Iterations 4\n",
      "thyroid_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670583\n",
      "         Iterations 4\n",
      "uterus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670577\n",
      "         Iterations 4\n",
      "vagina_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670581\n",
      "         Iterations 4\n",
      "active_gene_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669782\n",
      "         Iterations 4\n",
      "active_gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.666491\n",
      "         Iterations 5\n",
      "gene_fraction_TssA\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669165\n",
      "         Iterations 7\n",
      "gene_fraction_TssAFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669080\n",
      "         Iterations 6\n",
      "gene_fraction_Tx\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669275\n",
      "         Iterations 5\n",
      "gene_fraction_TxWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668119\n",
      "         Iterations 5\n",
      "gene_fraction_EnhG\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670569\n",
      "         Iterations 4\n",
      "gene_fraction_Enh\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669299\n",
      "         Iterations 6\n",
      "gene_fraction_ZNFRpts\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670475\n",
      "         Iterations 4\n",
      "gene_fraction_Het\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670587\n",
      "         Iterations 4\n",
      "gene_fraction_TssBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670582\n",
      "         Iterations 4\n",
      "gene_fraction_BivFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669797\n",
      "         Iterations 5\n",
      "gene_fraction_EnhBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670231\n",
      "         Iterations 5\n",
      "gene_fraction_ReprPC\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670582\n",
      "         Iterations 4\n",
      "gene_fraction_ReprPCWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670545\n",
      "         Iterations 4\n",
      "gene_fraction_Quies\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669695\n",
      "         Iterations 4\n",
      "fraction_A\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.665868\n",
      "         Iterations 4\n",
      "fraction_B\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.667353\n",
      "         Iterations 4\n",
      "fraction_unassigned\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668232\n",
      "         Iterations 4\n",
      "EDS\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688415\n",
      "         Iterations 4\n",
      "ActivityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688984\n",
      "         Iterations 4\n",
      "ActivityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688058\n",
      "         Iterations 4\n",
      "ProximityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687356\n",
      "         Iterations 6\n",
      "ProximityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690123\n",
      "         Iterations 3\n",
      "ActivityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.676588\n",
      "         Iterations 5\n",
      "ActivityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.686653\n",
      "         Iterations 4\n",
      "ProximityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690123\n",
      "         Iterations 3\n",
      "ProximityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687970\n",
      "         Iterations 5\n",
      "Episcore\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682745\n",
      "         Iterations 4\n",
      "pHaplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668160\n",
      "         Iterations 4\n",
      "pTriplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.675611\n",
      "         Iterations 4\n",
      "pLI\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688156\n",
      "         Iterations 4\n",
      "pNull\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691575\n",
      "         Iterations 4\n",
      "pRec\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.693024\n",
      "         Iterations 3\n",
      "oe_lof_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690339\n",
      "         Iterations 4\n",
      "oe_mis_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692019\n",
      "         Iterations 4\n",
      "gerp_element_per_bp\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669430\n",
      "         Iterations 4\n",
      "tad_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669676\n",
      "         Iterations 4\n",
      "phylop_mean\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670615\n",
      "         Iterations 4\n",
      "omim_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668648\n",
      "         Iterations 4\n",
      "gene_length\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635029\n",
      "         Iterations 6\n",
      "gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669514\n",
      "         Iterations 4\n",
      "gene_distance_min\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670562\n",
      "         Iterations 4\n",
      "tissue_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668792\n",
      "         Iterations 4\n",
      "adipose_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670192\n",
      "         Iterations 4\n",
      "adrenal_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670105\n",
      "         Iterations 4\n",
      "artery_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670187\n",
      "         Iterations 4\n",
      "bladder_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670316\n",
      "         Iterations 4\n",
      "brain_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.666524\n",
      "         Iterations 4\n",
      "breast_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670215\n",
      "         Iterations 4\n",
      "cervix_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670488\n",
      "         Iterations 4\n",
      "colon_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670591\n",
      "         Iterations 4\n",
      "esophagus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670546\n",
      "         Iterations 4\n",
      "fallopian_tube_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670513\n",
      "         Iterations 4\n",
      "heart_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669485\n",
      "         Iterations 4\n",
      "kidney_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670510\n",
      "         Iterations 4\n",
      "liver_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670097\n",
      "         Iterations 4\n",
      "lung_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670469\n",
      "         Iterations 4\n",
      "minor_salivary_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670592\n",
      "         Iterations 4\n",
      "muscle_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670236\n",
      "         Iterations 4\n",
      "nerve_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670369\n",
      "         Iterations 4\n",
      "ovary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670225\n",
      "         Iterations 4\n",
      "pancreas_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670038\n",
      "         Iterations 4\n",
      "pituitary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670056\n",
      "         Iterations 4\n",
      "prostate_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670371\n",
      "         Iterations 4\n",
      "skin_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670588\n",
      "         Iterations 4\n",
      "small_intestine_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670519\n",
      "         Iterations 4\n",
      "spleen_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669932\n",
      "         Iterations 4\n",
      "stomach_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670382\n",
      "         Iterations 4\n",
      "testis_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670541\n",
      "         Iterations 4\n",
      "thyroid_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670583\n",
      "         Iterations 4\n",
      "uterus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670577\n",
      "         Iterations 4\n",
      "vagina_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670581\n",
      "         Iterations 4\n",
      "active_gene_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669782\n",
      "         Iterations 4\n",
      "active_gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.666491\n",
      "         Iterations 5\n",
      "gene_fraction_TssA\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669165\n",
      "         Iterations 7\n",
      "gene_fraction_TssAFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669080\n",
      "         Iterations 6\n",
      "gene_fraction_Tx\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669275\n",
      "         Iterations 5\n",
      "gene_fraction_TxWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668119\n",
      "         Iterations 5\n",
      "gene_fraction_EnhG\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670569\n",
      "         Iterations 4\n",
      "gene_fraction_Enh\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669299\n",
      "         Iterations 6\n",
      "gene_fraction_ZNFRpts\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670475\n",
      "         Iterations 4\n",
      "gene_fraction_Het\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670587\n",
      "         Iterations 4\n",
      "gene_fraction_TssBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670582\n",
      "         Iterations 4\n",
      "gene_fraction_BivFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669797\n",
      "         Iterations 5\n",
      "gene_fraction_EnhBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670231\n",
      "         Iterations 5\n",
      "gene_fraction_ReprPC\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670582\n",
      "         Iterations 4\n",
      "gene_fraction_ReprPCWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670545\n",
      "         Iterations 4\n",
      "gene_fraction_Quies\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669695\n",
      "         Iterations 4\n",
      "fraction_A\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.665868\n",
      "         Iterations 4\n",
      "fraction_B\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.667353\n",
      "         Iterations 4\n",
      "fraction_unassigned\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668232\n",
      "         Iterations 4\n",
      "EDS\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688415\n",
      "         Iterations 4\n",
      "ActivityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688984\n",
      "         Iterations 4\n",
      "ActivityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688058\n",
      "         Iterations 4\n",
      "ProximityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687356\n",
      "         Iterations 6\n",
      "ProximityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690123\n",
      "         Iterations 3\n",
      "ActivityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.676588\n",
      "         Iterations 5\n",
      "ActivityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.686653\n",
      "         Iterations 4\n",
      "ProximityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690123\n",
      "         Iterations 3\n",
      "ProximityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687970\n",
      "         Iterations 5\n",
      "Episcore\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682745\n",
      "         Iterations 4\n",
      "pHaplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668160\n",
      "         Iterations 4\n",
      "pTriplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.675611\n",
      "         Iterations 4\n",
      "pLI\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688156\n",
      "         Iterations 4\n",
      "pNull\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691575\n",
      "         Iterations 4\n",
      "pRec\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.693024\n",
      "         Iterations 3\n",
      "oe_lof_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.690339\n",
      "         Iterations 4\n",
      "oe_mis_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.692019\n",
      "         Iterations 4\n",
      "gerp_element_per_bp\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669430\n",
      "         Iterations 4\n",
      "tad_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.669676\n",
      "         Iterations 4\n",
      "phylop_mean\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.670615\n",
      "         Iterations 4\n",
      "omim_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668648\n",
      "         Iterations 4\n",
      "gene_length\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.659531\n",
      "         Iterations 6\n",
      "gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689536\n",
      "         Iterations 4\n",
      "gene_distance_min\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689489\n",
      "         Iterations 4\n",
      "tissue_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.684568\n",
      "         Iterations 4\n",
      "adipose_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687627\n",
      "         Iterations 4\n",
      "adrenal_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687515\n",
      "         Iterations 4\n",
      "artery_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688348\n",
      "         Iterations 4\n",
      "bladder_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688329\n",
      "         Iterations 4\n",
      "brain_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.682777\n",
      "         Iterations 4\n",
      "breast_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687055\n",
      "         Iterations 4\n",
      "cervix_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688449\n",
      "         Iterations 4\n",
      "colon_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689077\n",
      "         Iterations 4\n",
      "esophagus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688861\n",
      "         Iterations 4\n",
      "fallopian_tube_expression\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688059\n",
      "         Iterations 4\n",
      "heart_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.686949\n",
      "         Iterations 4\n",
      "kidney_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688701\n",
      "         Iterations 4\n",
      "liver_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688731\n",
      "         Iterations 4\n",
      "lung_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687800\n",
      "         Iterations 4\n",
      "minor_salivary_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688982\n",
      "         Iterations 4\n",
      "muscle_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687910\n",
      "         Iterations 4\n",
      "nerve_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688020\n",
      "         Iterations 4\n",
      "ovary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687475\n",
      "         Iterations 4\n",
      "pancreas_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688615\n",
      "         Iterations 4\n",
      "pituitary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687165\n",
      "         Iterations 4\n",
      "prostate_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688502\n",
      "         Iterations 4\n",
      "skin_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689404\n",
      "         Iterations 3\n",
      "small_intestine_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688931\n",
      "         Iterations 4\n",
      "spleen_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687934\n",
      "         Iterations 4\n",
      "stomach_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688416\n",
      "         Iterations 4\n",
      "testis_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689696\n",
      "         Iterations 3\n",
      "thyroid_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688709\n",
      "         Iterations 4\n",
      "uterus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688384\n",
      "         Iterations 4\n",
      "vagina_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689077\n",
      "         Iterations 4\n",
      "active_gene_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689741\n",
      "         Iterations 3\n",
      "active_gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689628\n",
      "         Iterations 4\n",
      "gene_fraction_TssA\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689538\n",
      "         Iterations 5\n",
      "gene_fraction_TssAFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689577\n",
      "         Iterations 4\n",
      "gene_fraction_Tx\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689744\n",
      "         Iterations 3\n",
      "gene_fraction_TxWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689666\n",
      "         Iterations 4\n",
      "gene_fraction_EnhG\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689727\n",
      "         Iterations 4\n",
      "gene_fraction_Enh\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689587\n",
      "         Iterations 4\n",
      "gene_fraction_ZNFRpts\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689326\n",
      "         Iterations 6\n",
      "gene_fraction_Het\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689708\n",
      "         Iterations 3\n",
      "gene_fraction_TssBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689674\n",
      "         Iterations 4\n",
      "gene_fraction_BivFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688890\n",
      "         Iterations 5\n",
      "gene_fraction_EnhBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689327\n",
      "         Iterations 5\n",
      "gene_fraction_ReprPC\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689709\n",
      "         Iterations 4\n",
      "gene_fraction_ReprPCWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689745\n",
      "         Iterations 3\n",
      "gene_fraction_Quies\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689703\n",
      "         Iterations 3\n",
      "fraction_A\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688964\n",
      "         Iterations 4\n",
      "fraction_B\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689419\n",
      "         Iterations 4\n",
      "fraction_unassigned\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.687959\n",
      "         Iterations 5\n",
      "EDS\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.675038\n",
      "         Iterations 5\n",
      "ActivityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.676405\n",
      "         Iterations 4\n",
      "ActivityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.676254\n",
      "         Iterations 4\n",
      "ProximityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.675764\n",
      "         Iterations 6\n",
      "ProximityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.677963\n",
      "         Iterations 4\n",
      "ActivityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.668133\n",
      "         Iterations 5\n",
      "ActivityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.674646\n",
      "         Iterations 4\n",
      "ProximityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.678326\n",
      "         Iterations 4\n",
      "ProximityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.676134\n",
      "         Iterations 6\n",
      "Episcore\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.675447\n",
      "         Iterations 5\n",
      "pHaplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688194\n",
      "         Iterations 4\n",
      "pTriplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.691001\n",
      "         Iterations 4\n",
      "pLI\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.675099\n",
      "         Iterations 4\n",
      "pNull\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.678338\n",
      "         Iterations 4\n",
      "pRec\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.683668\n",
      "         Iterations 4\n",
      "oe_lof_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.678860\n",
      "         Iterations 4\n",
      "oe_mis_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.685425\n",
      "         Iterations 4\n",
      "gerp_element_per_bp\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689483\n",
      "         Iterations 4\n",
      "tad_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689601\n",
      "         Iterations 3\n",
      "phylop_mean\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.689682\n",
      "         Iterations 3\n",
      "omim_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.688216\n",
      "         Iterations 5\n",
      "gene_length\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.638215\n",
      "         Iterations 7\n",
      "gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653241\n",
      "         Iterations 5\n",
      "gene_distance_min\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655117\n",
      "         Iterations 4\n",
      "tissue_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.648317\n",
      "         Iterations 5\n",
      "adipose_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653697\n",
      "         Iterations 5\n",
      "adrenal_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654144\n",
      "         Iterations 5\n",
      "artery_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653691\n",
      "         Iterations 5\n",
      "bladder_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654177\n",
      "         Iterations 5\n",
      "brain_expression\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.648787\n",
      "         Iterations 5\n",
      "breast_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653152\n",
      "         Iterations 5\n",
      "cervix_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653532\n",
      "         Iterations 5\n",
      "colon_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654213\n",
      "         Iterations 5\n",
      "esophagus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654105\n",
      "         Iterations 5\n",
      "fallopian_tube_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653221\n",
      "         Iterations 5\n",
      "heart_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653528\n",
      "         Iterations 5\n",
      "kidney_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655226\n",
      "         Iterations 5\n",
      "liver_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655638\n",
      "         Iterations 5\n",
      "lung_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653492\n",
      "         Iterations 5\n",
      "minor_salivary_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655147\n",
      "         Iterations 5\n",
      "muscle_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654097\n",
      "         Iterations 5\n",
      "nerve_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.652213\n",
      "         Iterations 5\n",
      "ovary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653770\n",
      "         Iterations 5\n",
      "pancreas_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654007\n",
      "         Iterations 5\n",
      "pituitary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653075\n",
      "         Iterations 5\n",
      "prostate_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654176\n",
      "         Iterations 5\n",
      "skin_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655240\n",
      "         Iterations 5\n",
      "small_intestine_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653657\n",
      "         Iterations 5\n",
      "spleen_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655241\n",
      "         Iterations 5\n",
      "stomach_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654495\n",
      "         Iterations 5\n",
      "testis_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656768\n",
      "         Iterations 4\n",
      "thyroid_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654018\n",
      "         Iterations 5\n",
      "uterus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.652167\n",
      "         Iterations 5\n",
      "vagina_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654522\n",
      "         Iterations 5\n",
      "active_gene_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653557\n",
      "         Iterations 4\n",
      "active_gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653474\n",
      "         Iterations 5\n",
      "gene_fraction_TssA\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656266\n",
      "         Iterations 6\n",
      "gene_fraction_TssAFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656046\n",
      "         Iterations 6\n",
      "gene_fraction_Tx\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655142\n",
      "         Iterations 6\n",
      "gene_fraction_TxWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.653416\n",
      "         Iterations 5\n",
      "gene_fraction_EnhG\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655645\n",
      "         Iterations 11\n",
      "gene_fraction_Enh\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656122\n",
      "         Iterations 5\n",
      "gene_fraction_ZNFRpts\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655957\n",
      "         Iterations 9\n",
      "gene_fraction_Het\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656707\n",
      "         Iterations 4\n",
      "gene_fraction_TssBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656767\n",
      "         Iterations 4\n",
      "gene_fraction_BivFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656768\n",
      "         Iterations 4\n",
      "gene_fraction_EnhBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656544\n",
      "         Iterations 4\n",
      "gene_fraction_ReprPC\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656741\n",
      "         Iterations 4\n",
      "gene_fraction_ReprPCWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656581\n",
      "         Iterations 4\n",
      "gene_fraction_Quies\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655990\n",
      "         Iterations 4\n",
      "fraction_A\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655326\n",
      "         Iterations 4\n",
      "fraction_B\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.654629\n",
      "         Iterations 4\n",
      "fraction_unassigned\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655266\n",
      "         Iterations 5\n",
      "EDS\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.555106\n",
      "         Iterations 5\n",
      "ActivityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.552754\n",
      "         Iterations 5\n",
      "ActivityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.555107\n",
      "         Iterations 5\n",
      "ProximityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.554028\n",
      "         Iterations 6\n",
      "ProximityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.552402\n",
      "         Iterations 6\n",
      "ActivityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.554602\n",
      "         Iterations 5\n",
      "ActivityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.554898\n",
      "         Iterations 5\n",
      "ProximityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.554727\n",
      "         Iterations 5\n",
      "ProximityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.553903\n",
      "         Iterations 6\n",
      "Episcore\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.552890\n",
      "         Iterations 5\n",
      "pHaplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.601610\n",
      "         Iterations 5\n",
      "pTriplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.589196\n",
      "         Iterations 5\n",
      "pLI\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.573694\n",
      "         Iterations 5\n",
      "pNull\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.571621\n",
      "         Iterations 5\n",
      "pRec\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.572037\n",
      "         Iterations 5\n",
      "oe_lof_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.574358\n",
      "         Iterations 5\n",
      "oe_mis_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.560296\n",
      "         Iterations 5\n",
      "gerp_element_per_bp\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656767\n",
      "         Iterations 4\n",
      "tad_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656357\n",
      "         Iterations 4\n",
      "phylop_mean\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.656665\n",
      "         Iterations 4\n",
      "omim_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.655564\n",
      "         Iterations 5\n"
     ]
    }
   ],
   "source": [
    "lncrna_enrich_logr_results, count = {}, 0\n",
    "for z_score in [2, 10, 20, 30]:\n",
    "    gene_misexp_gene_path = wkdir_path.joinpath(f\"2_misexp_qc/misexp_metrics/misexp_genes_tpm0.5_z{z_score}.txt\")\n",
    "    gene_misexp = set(pd.read_csv(gene_misexp_gene_path, sep=\"\\t\", header=None)[0].tolist())\n",
    "    gene_never_misexp_path = wkdir_path.joinpath(f\"2_misexp_qc/misexp_metrics/never_misexp_genes_tpm0.5_z{z_score}.txt\")\n",
    "    gene_never_misexp = set(pd.read_csv(gene_never_misexp_path, sep=\"\\t\", header=None)[0].tolist())\n",
    "    # categorical variable: misexpressed, never misexpressed \n",
    "    gene_lncrna_features_df[\"misexp_group\"] = np.where(gene_lncrna_features_df.gene_id.isin(gene_never_misexp), 0, 1)\n",
    "    # logistic regression \n",
    "    for feature in lncrna_features:\n",
    "        print(feature)\n",
    "        input_df = gene_lncrna_features_df.copy()\n",
    "        input_df[f\"{feature}_norm\"] = (input_df[feature] - input_df[feature].mean())/input_df[feature].std()\n",
    "        y, X = dmatrices(f\"misexp_group ~ {feature}_norm\", input_df, return_type = 'dataframe')\n",
    "        logit_fit = discrete_model.Logit(endog=y, exog=X).fit()\n",
    "        log_odds, pval = logit_fit.params[1], logit_fit.pvalues[1]\n",
    "        # normal approximation confidence intervals\n",
    "        lower_conf = logit_fit.conf_int(alpha=0.05)[0][1]\n",
    "        upper_conf = logit_fit.conf_int(alpha=0.05)[1][1]\n",
    "        lncrna_enrich_logr_results[count] = [z_score, feature, log_odds, lower_conf, upper_conf, pval]\n",
    "        count += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "0adcf9d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# results \n",
    "lncrna_enrich_logr_results_df = pd.DataFrame.from_dict(lncrna_enrich_logr_results, \n",
    "                                                orient=\"index\", \n",
    "                                                columns=[\"z_score\", \"feature\", \"log_odds\", \"lower\", \"upper\", \"pval\"])\n",
    "# multiple testing correction \n",
    "pval_as_array = lncrna_enrich_logr_results_df.pval.to_numpy()\n",
    "# FDR BH and Bonferroni correction \n",
    "for method in [\"fdr_bh\", \"bonferroni\"]:\n",
    "    pass_test, pval_adj, _, _ = multitest.multipletests(pval_as_array, alpha=0.05, method=method)\n",
    "    lncrna_enrich_logr_results_df[f\"{method}_pass\"] = pass_test\n",
    "    lncrna_enrich_logr_results_df[f\"{method}_pval_adj\"] = pval_adj\n",
    "lncrna_enrich_logr_results_df[\"feature_name\"] = lncrna_enrich_logr_results_df.feature.replace(feature_name_dict)\n",
    "# add feature group \n",
    "features_to_group_dict = {}\n",
    "for group in feature_groups_dict.keys(): \n",
    "    for feature in feature_groups_dict[group]: \n",
    "        features_to_group_dict[feature] = group \n",
    "lncrna_enrich_logr_results_df[\"feature_group\"] = lncrna_enrich_logr_results_df.feature.replace(features_to_group_dict)\n",
    "# calculate absolute value of log-odds \n",
    "lncrna_enrich_logr_results_df[\"log_odds_abs\"] = lncrna_enrich_logr_results_df.log_odds.abs()\n",
    "# z-score > 2, passing Bonferroni correction  \n",
    "lncrna_enrich_logr_results_pass_z2_df = lncrna_enrich_logr_results_df[(lncrna_enrich_logr_results_df.bonferroni_pass) & \n",
    "                                                                      (lncrna_enrich_logr_results_df[\"z_score\"] == 2)\n",
    "                                                                     ].sort_values(by=\"log_odds_abs\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "cce6d633",
   "metadata": {},
   "outputs": [],
   "source": [
    "# show top 15 features in main figure\n",
    "lncrna_enrich_logr_results_pass_z2_top_results_df = lncrna_enrich_logr_results_pass_z2_df.head(top_results).copy()\n",
    "lncrna_top_features = set(lncrna_enrich_logr_results_pass_z2_top_results_df.feature.unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "80fb87b2",
   "metadata": {},
   "source": [
    "Enrichment testing for protein-coding genes "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "d99fa858",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of features: 81\n",
      "Number of genes: 3092\n"
     ]
    }
   ],
   "source": [
    "gene_features_df = pd.read_csv(gene_features_path)\n",
    "gene_protein_features_df = gene_features_df[gene_features_df.protein_coding == 1].copy()\n",
    "\n",
    "# remove features that have all entries = 0\n",
    "protein_features_to_rmv = [\"gene_id\", \"gene_type\", \"protein_coding\", \"misexp_group\"]\n",
    "\n",
    "protein_features = [col for col in gene_protein_features_df.columns if col not in protein_features_to_rmv]\n",
    "\n",
    "print(f\"Number of features: {len(protein_features)}\")\n",
    "protein_gene_ids_with_features = gene_protein_features_df.gene_id.unique()\n",
    "print(f\"Number of genes: {len(protein_gene_ids_with_features)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "080cf27a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gene_length\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.623480\n",
      "         Iterations 7\n",
      "gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.634177\n",
      "         Iterations 5\n",
      "gene_distance_min\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644017\n",
      "         Iterations 4\n",
      "tissue_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642521\n",
      "         Iterations 5\n",
      "adipose_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644026\n",
      "         Iterations 4\n",
      "adrenal_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644684\n",
      "         Iterations 4\n",
      "artery_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644437\n",
      "         Iterations 4\n",
      "bladder_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644333\n",
      "         Iterations 4\n",
      "brain_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641233\n",
      "         Iterations 5\n",
      "breast_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644211\n",
      "         Iterations 4\n",
      "cervix_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643567\n",
      "         Iterations 4\n",
      "colon_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643628\n",
      "         Iterations 4\n",
      "esophagus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644956\n",
      "         Iterations 4\n",
      "fallopian_tube_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643461\n",
      "         Iterations 4\n",
      "heart_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642721\n",
      "         Iterations 5\n",
      "kidney_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642600\n",
      "         Iterations 5\n",
      "liver_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643494\n",
      "         Iterations 5\n",
      "lung_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644496\n",
      "         Iterations 4\n",
      "minor_salivary_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644970\n",
      "         Iterations 4\n",
      "muscle_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644160\n",
      "         Iterations 5\n",
      "nerve_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643778\n",
      "         Iterations 4\n",
      "ovary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644118\n",
      "         Iterations 4\n",
      "pancreas_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644411\n",
      "         Iterations 4\n",
      "pituitary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642199\n",
      "         Iterations 5\n",
      "prostate_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643854\n",
      "         Iterations 4\n",
      "skin_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644282\n",
      "         Iterations 4\n",
      "small_intestine_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644811\n",
      "         Iterations 4\n",
      "spleen_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644958\n",
      "         Iterations 4\n",
      "stomach_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644585\n",
      "         Iterations 4\n",
      "testis_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643948\n",
      "         Iterations 4\n",
      "thyroid_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643464\n",
      "         Iterations 5\n",
      "uterus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643932\n",
      "         Iterations 4\n",
      "vagina_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644799\n",
      "         Iterations 4\n",
      "active_gene_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643073\n",
      "         Iterations 5\n",
      "active_gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.636649\n",
      "         Iterations 5\n",
      "gene_fraction_TssA\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644951\n",
      "         Iterations 4\n",
      "gene_fraction_TssAFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644953\n",
      "         Iterations 4\n",
      "gene_fraction_TxFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644969\n",
      "         Iterations 4\n",
      "gene_fraction_Tx\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644924\n",
      "         Iterations 4\n",
      "gene_fraction_TxWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643416\n",
      "         Iterations 4\n",
      "gene_fraction_EnhG\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644896\n",
      "         Iterations 6\n",
      "gene_fraction_Enh\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642964\n",
      "         Iterations 5\n",
      "gene_fraction_ZNFRpts\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644924\n",
      "         Iterations 6\n",
      "gene_fraction_Het\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644353\n",
      "         Iterations 5\n",
      "gene_fraction_TssBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644478\n",
      "         Iterations 4\n",
      "gene_fraction_BivFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644481\n",
      "         Iterations 4\n",
      "gene_fraction_EnhBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644627\n",
      "         Iterations 4\n",
      "gene_fraction_ReprPC\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644536\n",
      "         Iterations 4\n",
      "gene_fraction_ReprPCWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643279\n",
      "         Iterations 4\n",
      "gene_fraction_Quies\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641766\n",
      "         Iterations 4\n",
      "fraction_A\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635949\n",
      "         Iterations 5\n",
      "fraction_B\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.636995\n",
      "         Iterations 5\n",
      "fraction_unassigned\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644272\n",
      "         Iterations 5\n",
      "gnomad_autosomal_dominant\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644969\n",
      "         Iterations 4\n",
      "gnomad_haploinsufficient\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644623\n",
      "         Iterations 5\n",
      "gnomad_autosomal_recessive\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643566\n",
      "         Iterations 5\n",
      "gnomad_olfactory_genes\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644970\n",
      "         Iterations 4\n",
      "oncogene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644727\n",
      "         Iterations 5\n",
      "EDS\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.632106\n",
      "         Iterations 5\n",
      "ActivityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.637115\n",
      "         Iterations 5\n",
      "ActivityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641068\n",
      "         Iterations 4\n",
      "ProximityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.628829\n",
      "         Iterations 6\n",
      "ProximityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.629598\n",
      "         Iterations 5\n",
      "ActivityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.634714\n",
      "         Iterations 5\n",
      "ActivityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.637787\n",
      "         Iterations 5\n",
      "ProximityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.625156\n",
      "         Iterations 5\n",
      "ProximityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.627481\n",
      "         Iterations 6\n",
      "Episcore\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.633633\n",
      "         Iterations 5\n",
      "pHaplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.633325\n",
      "         Iterations 5\n",
      "pTriplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635907\n",
      "         Iterations 5\n",
      "pLI\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.631815\n",
      "         Iterations 5\n",
      "pNull\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635021\n",
      "         Iterations 4\n",
      "pRec\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635879\n",
      "         Iterations 4\n",
      "oe_lof_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612370\n",
      "         Iterations 5\n",
      "oe_mis_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.631497\n",
      "         Iterations 5\n",
      "gerp_element_per_bp\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644716\n",
      "         Iterations 4\n",
      "tad_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643607\n",
      "         Iterations 5\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "phylop_mean\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644970\n",
      "         Iterations 4\n",
      "approved_target\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641278\n",
      "         Iterations 5\n",
      "decipher_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642596\n",
      "         Iterations 5\n",
      "omim_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643091\n",
      "         Iterations 4\n",
      "gene_length\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.623480\n",
      "         Iterations 7\n",
      "gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.634177\n",
      "         Iterations 5\n",
      "gene_distance_min\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644017\n",
      "         Iterations 4\n",
      "tissue_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642521\n",
      "         Iterations 5\n",
      "adipose_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644026\n",
      "         Iterations 4\n",
      "adrenal_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644684\n",
      "         Iterations 4\n",
      "artery_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644437\n",
      "         Iterations 4\n",
      "bladder_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644333\n",
      "         Iterations 4\n",
      "brain_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641233\n",
      "         Iterations 5\n",
      "breast_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644211\n",
      "         Iterations 4\n",
      "cervix_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643567\n",
      "         Iterations 4\n",
      "colon_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643628\n",
      "         Iterations 4\n",
      "esophagus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644956\n",
      "         Iterations 4\n",
      "fallopian_tube_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643461\n",
      "         Iterations 4\n",
      "heart_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642721\n",
      "         Iterations 5\n",
      "kidney_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642600\n",
      "         Iterations 5\n",
      "liver_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643494\n",
      "         Iterations 5\n",
      "lung_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644496\n",
      "         Iterations 4\n",
      "minor_salivary_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644970\n",
      "         Iterations 4\n",
      "muscle_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644160\n",
      "         Iterations 5\n",
      "nerve_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643778\n",
      "         Iterations 4\n",
      "ovary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644118\n",
      "         Iterations 4\n",
      "pancreas_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644411\n",
      "         Iterations 4\n",
      "pituitary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642199\n",
      "         Iterations 5\n",
      "prostate_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643854\n",
      "         Iterations 4\n",
      "skin_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644282\n",
      "         Iterations 4\n",
      "small_intestine_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644811\n",
      "         Iterations 4\n",
      "spleen_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644958\n",
      "         Iterations 4\n",
      "stomach_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644585\n",
      "         Iterations 4\n",
      "testis_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643948\n",
      "         Iterations 4\n",
      "thyroid_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643464\n",
      "         Iterations 5\n",
      "uterus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643932\n",
      "         Iterations 4\n",
      "vagina_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644799\n",
      "         Iterations 4\n",
      "active_gene_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643073\n",
      "         Iterations 5\n",
      "active_gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.636649\n",
      "         Iterations 5\n",
      "gene_fraction_TssA\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644951\n",
      "         Iterations 4\n",
      "gene_fraction_TssAFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644953\n",
      "         Iterations 4\n",
      "gene_fraction_TxFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644969\n",
      "         Iterations 4\n",
      "gene_fraction_Tx\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644924\n",
      "         Iterations 4\n",
      "gene_fraction_TxWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643416\n",
      "         Iterations 4\n",
      "gene_fraction_EnhG\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644896\n",
      "         Iterations 6\n",
      "gene_fraction_Enh\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642964\n",
      "         Iterations 5\n",
      "gene_fraction_ZNFRpts\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644924\n",
      "         Iterations 6\n",
      "gene_fraction_Het\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644353\n",
      "         Iterations 5\n",
      "gene_fraction_TssBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644478\n",
      "         Iterations 4\n",
      "gene_fraction_BivFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644481\n",
      "         Iterations 4\n",
      "gene_fraction_EnhBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644627\n",
      "         Iterations 4\n",
      "gene_fraction_ReprPC\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644536\n",
      "         Iterations 4\n",
      "gene_fraction_ReprPCWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643279\n",
      "         Iterations 4\n",
      "gene_fraction_Quies\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641766\n",
      "         Iterations 4\n",
      "fraction_A\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635949\n",
      "         Iterations 5\n",
      "fraction_B\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.636995\n",
      "         Iterations 5\n",
      "fraction_unassigned\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644272\n",
      "         Iterations 5\n",
      "gnomad_autosomal_dominant\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644969\n",
      "         Iterations 4\n",
      "gnomad_haploinsufficient\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644623\n",
      "         Iterations 5\n",
      "gnomad_autosomal_recessive\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643566\n",
      "         Iterations 5\n",
      "gnomad_olfactory_genes\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644970\n",
      "         Iterations 4\n",
      "oncogene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644727\n",
      "         Iterations 5\n",
      "EDS\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.632106\n",
      "         Iterations 5\n",
      "ActivityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.637115\n",
      "         Iterations 5\n",
      "ActivityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641068\n",
      "         Iterations 4\n",
      "ProximityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.628829\n",
      "         Iterations 6\n",
      "ProximityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.629598\n",
      "         Iterations 5\n",
      "ActivityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.634714\n",
      "         Iterations 5\n",
      "ActivityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.637787\n",
      "         Iterations 5\n",
      "ProximityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.625156\n",
      "         Iterations 5\n",
      "ProximityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.627481\n",
      "         Iterations 6\n",
      "Episcore\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.633633\n",
      "         Iterations 5\n",
      "pHaplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.633325\n",
      "         Iterations 5\n",
      "pTriplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635907\n",
      "         Iterations 5\n",
      "pLI\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.631815\n",
      "         Iterations 5\n",
      "pNull\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635021\n",
      "         Iterations 4\n",
      "pRec\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.635879\n",
      "         Iterations 4\n",
      "oe_lof_upper\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.612370\n",
      "         Iterations 5\n",
      "oe_mis_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.631497\n",
      "         Iterations 5\n",
      "gerp_element_per_bp\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644716\n",
      "         Iterations 4\n",
      "tad_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643607\n",
      "         Iterations 5\n",
      "phylop_mean\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.644970\n",
      "         Iterations 4\n",
      "approved_target\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.641278\n",
      "         Iterations 5\n",
      "decipher_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.642596\n",
      "         Iterations 5\n",
      "omim_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.643091\n",
      "         Iterations 4\n",
      "gene_length\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.581989\n",
      "         Iterations 7\n",
      "gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.590542\n",
      "         Iterations 5\n",
      "gene_distance_min\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596310\n",
      "         Iterations 5\n",
      "tissue_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.594496\n",
      "         Iterations 5\n",
      "adipose_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597362\n",
      "         Iterations 5\n",
      "adrenal_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597532\n",
      "         Iterations 5\n",
      "artery_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596832\n",
      "         Iterations 5\n",
      "bladder_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597613\n",
      "         Iterations 5\n",
      "brain_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.591942\n",
      "         Iterations 5\n",
      "breast_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596950\n",
      "         Iterations 5\n",
      "cervix_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.595750\n",
      "         Iterations 5\n",
      "colon_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.595912\n",
      "         Iterations 5\n",
      "esophagus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598021\n",
      "         Iterations 5\n",
      "fallopian_tube_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596067\n",
      "         Iterations 5\n",
      "heart_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596187\n",
      "         Iterations 5\n",
      "kidney_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.595611\n",
      "         Iterations 5\n",
      "liver_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596136\n",
      "         Iterations 5\n",
      "lung_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597345\n",
      "         Iterations 5\n",
      "minor_salivary_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598142\n",
      "         Iterations 5\n",
      "muscle_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596991\n",
      "         Iterations 5\n",
      "nerve_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596680\n",
      "         Iterations 5\n",
      "ovary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596465\n",
      "         Iterations 5\n",
      "pancreas_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597059\n",
      "         Iterations 5\n",
      "pituitary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.593900\n",
      "         Iterations 5\n",
      "prostate_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596834\n",
      "         Iterations 5\n",
      "skin_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597521\n",
      "         Iterations 5\n",
      "small_intestine_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597897\n",
      "         Iterations 5\n",
      "spleen_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597842\n",
      "         Iterations 5\n",
      "stomach_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597499\n",
      "         Iterations 5\n",
      "testis_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.595725\n",
      "         Iterations 5\n",
      "thyroid_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596130\n",
      "         Iterations 5\n",
      "uterus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596136\n",
      "         Iterations 5\n",
      "vagina_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597925\n",
      "         Iterations 5\n",
      "active_gene_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597864\n",
      "         Iterations 5\n",
      "active_gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.594663\n",
      "         Iterations 5\n",
      "gene_fraction_TssA\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598042\n",
      "         Iterations 6\n",
      "gene_fraction_TssAFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597366\n",
      "         Iterations 7\n",
      "gene_fraction_TxFlnk\n",
      "Warning: Maximum number of iterations has been exceeded.\n",
      "         Current function value: 0.597925\n",
      "         Iterations: 35\n",
      "gene_fraction_Tx\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598136\n",
      "         Iterations 5\n",
      "gene_fraction_TxWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597611\n",
      "         Iterations 5\n",
      "gene_fraction_EnhG\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/nfs/users/nfs_t/tv5/.conda/envs/tv5_base/lib/python3.7/site-packages/statsmodels/base/model.py:606: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596939\n",
      "         Iterations 11\n",
      "gene_fraction_Enh\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596662\n",
      "         Iterations 5\n",
      "gene_fraction_ZNFRpts\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598122\n",
      "         Iterations 5\n",
      "gene_fraction_Het\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597780\n",
      "         Iterations 5\n",
      "gene_fraction_TssBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597905\n",
      "         Iterations 5\n",
      "gene_fraction_BivFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597999\n",
      "         Iterations 5\n",
      "gene_fraction_EnhBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598074\n",
      "         Iterations 5\n",
      "gene_fraction_ReprPC\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598049\n",
      "         Iterations 5\n",
      "gene_fraction_ReprPCWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596240\n",
      "         Iterations 5\n",
      "gene_fraction_Quies\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596129\n",
      "         Iterations 5\n",
      "fraction_A\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.593532\n",
      "         Iterations 5\n",
      "fraction_B\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.594127\n",
      "         Iterations 5\n",
      "fraction_unassigned\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597695\n",
      "         Iterations 5\n",
      "gnomad_autosomal_dominant\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598070\n",
      "         Iterations 5\n",
      "gnomad_haploinsufficient\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597714\n",
      "         Iterations 5\n",
      "gnomad_autosomal_recessive\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.596186\n",
      "         Iterations 5\n",
      "gnomad_olfactory_genes\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598044\n",
      "         Iterations 5\n",
      "oncogene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597211\n",
      "         Iterations 6\n",
      "EDS\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.585475\n",
      "         Iterations 5\n",
      "ActivityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.590200\n",
      "         Iterations 5\n",
      "ActivityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.593959\n",
      "         Iterations 5\n",
      "ProximityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.582895\n",
      "         Iterations 6\n",
      "ProximityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.585209\n",
      "         Iterations 5\n",
      "ActivityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.588734\n",
      "         Iterations 5\n",
      "ActivityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.591367\n",
      "         Iterations 5\n",
      "ProximityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.582690\n",
      "         Iterations 6\n",
      "ProximityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.582539\n",
      "         Iterations 6\n",
      "Episcore\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.587049\n",
      "         Iterations 5\n",
      "pHaplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.585303\n",
      "         Iterations 5\n",
      "pTriplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.587614\n",
      "         Iterations 5\n",
      "pLI\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.583359\n",
      "         Iterations 5\n",
      "pNull\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.586263\n",
      "         Iterations 5\n",
      "pRec\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.587470\n",
      "         Iterations 5\n",
      "oe_lof_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.561135\n",
      "         Iterations 5\n",
      "oe_mis_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.582324\n",
      "         Iterations 5\n",
      "gerp_element_per_bp\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597085\n",
      "         Iterations 5\n",
      "tad_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.597912\n",
      "         Iterations 5\n",
      "phylop_mean\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.598032\n",
      "         Iterations 5\n",
      "approved_target\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.594681\n",
      "         Iterations 5\n",
      "decipher_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.595757\n",
      "         Iterations 5\n",
      "omim_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.595706\n",
      "         Iterations 5\n",
      "gene_length\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.473284\n",
      "         Iterations 7\n",
      "gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.473981\n",
      "         Iterations 6\n",
      "gene_distance_min\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478993\n",
      "         Iterations 5\n",
      "tissue_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.477576\n",
      "         Iterations 6\n",
      "adipose_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480099\n",
      "         Iterations 5\n",
      "adrenal_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480499\n",
      "         Iterations 5\n",
      "artery_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479612\n",
      "         Iterations 6\n",
      "bladder_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480045\n",
      "         Iterations 5\n",
      "brain_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.474493\n",
      "         Iterations 6\n",
      "breast_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479771\n",
      "         Iterations 5\n",
      "cervix_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478434\n",
      "         Iterations 6\n",
      "colon_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478419\n",
      "         Iterations 6\n",
      "esophagus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480690\n",
      "         Iterations 5\n",
      "fallopian_tube_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479132\n",
      "         Iterations 6\n",
      "heart_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479208\n",
      "         Iterations 6\n",
      "kidney_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478914\n",
      "         Iterations 6\n",
      "liver_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478760\n",
      "         Iterations 6\n",
      "lung_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479145\n",
      "         Iterations 6\n",
      "minor_salivary_gland_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480751\n",
      "         Iterations 5\n",
      "muscle_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479370\n",
      "         Iterations 6\n",
      "nerve_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478053\n",
      "         Iterations 6\n",
      "ovary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479329\n",
      "         Iterations 6\n",
      "pancreas_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480060\n",
      "         Iterations 5\n",
      "pituitary_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.476889\n",
      "         Iterations 6\n",
      "prostate_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479673\n",
      "         Iterations 5\n",
      "skin_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480006\n",
      "         Iterations 5\n",
      "small_intestine_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480546\n",
      "         Iterations 5\n",
      "spleen_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480203\n",
      "         Iterations 5\n",
      "stomach_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480060\n",
      "         Iterations 5\n",
      "testis_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.477318\n",
      "         Iterations 5\n",
      "thyroid_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478890\n",
      "         Iterations 6\n",
      "uterus_expression\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478740\n",
      "         Iterations 6\n",
      "vagina_expression\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480662\n",
      "         Iterations 5\n",
      "active_gene_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480728\n",
      "         Iterations 5\n",
      "active_gene_count_1Mb\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479112\n",
      "         Iterations 5\n",
      "gene_fraction_TssA\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479182\n",
      "         Iterations 9\n",
      "gene_fraction_TssAFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478632\n",
      "         Iterations 10\n",
      "gene_fraction_TxFlnk\n",
      "Warning: Maximum number of iterations has been exceeded.\n",
      "         Current function value: 0.480661\n",
      "         Iterations: 35\n",
      "gene_fraction_Tx\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480540\n",
      "         Iterations 6\n",
      "gene_fraction_TxWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480384\n",
      "         Iterations 6\n",
      "gene_fraction_EnhG\n",
      "Warning: Maximum number of iterations has been exceeded.\n",
      "         Current function value: 0.479657\n",
      "         Iterations: 35\n",
      "gene_fraction_Enh\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480730\n",
      "         Iterations 5\n",
      "gene_fraction_ZNFRpts\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480753\n",
      "         Iterations 7\n",
      "gene_fraction_Het\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480613\n",
      "         Iterations 6\n",
      "gene_fraction_TssBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480717\n",
      "         Iterations 5\n",
      "gene_fraction_BivFlnk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480772\n",
      "         Iterations 5\n",
      "gene_fraction_EnhBiv\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480792\n",
      "         Iterations 5\n",
      "gene_fraction_ReprPC\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480693\n",
      "         Iterations 5\n",
      "gene_fraction_ReprPCWk\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.477494\n",
      "         Iterations 5\n",
      "gene_fraction_Quies\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479034\n",
      "         Iterations 5\n",
      "fraction_A\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478953\n",
      "         Iterations 5\n",
      "fraction_B\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479289\n",
      "         Iterations 5\n",
      "fraction_unassigned\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480410\n",
      "         Iterations 6\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/nfs/users/nfs_t/tv5/.conda/envs/tv5_base/lib/python3.7/site-packages/statsmodels/base/model.py:606: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  ConvergenceWarning)\n",
      "/nfs/users/nfs_t/tv5/.conda/envs/tv5_base/lib/python3.7/site-packages/statsmodels/discrete/discrete_model.py:1819: RuntimeWarning: overflow encountered in exp\n",
      "  return 1/(1+np.exp(-X))\n",
      "/nfs/users/nfs_t/tv5/.conda/envs/tv5_base/lib/python3.7/site-packages/statsmodels/base/model.py:606: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gnomad_autosomal_dominant\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480638\n",
      "         Iterations 5\n",
      "gnomad_haploinsufficient\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480675\n",
      "         Iterations 6\n",
      "gnomad_autosomal_recessive\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479909\n",
      "         Iterations 6\n",
      "gnomad_olfactory_genes\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480487\n",
      "         Iterations 5\n",
      "oncogene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480548\n",
      "         Iterations 6\n",
      "EDS\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.472641\n",
      "         Iterations 6\n",
      "ActivityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.475606\n",
      "         Iterations 6\n",
      "ActivityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.477730\n",
      "         Iterations 5\n",
      "ProximityLinking_Conserved_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.472984\n",
      "         Iterations 6\n",
      "ProximityLinking_nt_count\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.473842\n",
      "         Iterations 6\n",
      "ActivityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.474380\n",
      "         Iterations 6\n",
      "ActivityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.476384\n",
      "         Iterations 6\n",
      "ProximityLinking_EnhancerNumber\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.473245\n",
      "         Iterations 6\n",
      "ProximityLinking_NumberConservedElements\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.472515\n",
      "         Iterations 6\n",
      "Episcore\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.472744\n",
      "         Iterations 6\n",
      "pHaplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.466855\n",
      "         Iterations 6\n",
      "pTriplo\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.469417\n",
      "         Iterations 5\n",
      "pLI\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.469075\n",
      "         Iterations 6\n",
      "pNull\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.470767\n",
      "         Iterations 5\n",
      "pRec\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.471090\n",
      "         Iterations 5\n",
      "oe_lof_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.454855\n",
      "         Iterations 6\n",
      "oe_mis_upper\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.469023\n",
      "         Iterations 6\n",
      "gerp_element_per_bp\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.478912\n",
      "         Iterations 6\n",
      "tad_distance\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480763\n",
      "         Iterations 5\n",
      "phylop_mean\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480735\n",
      "         Iterations 5\n",
      "approved_target\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479999\n",
      "         Iterations 6\n",
      "decipher_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.480066\n",
      "         Iterations 6\n",
      "omim_gene\n",
      "Optimization terminated successfully.\n",
      "         Current function value: 0.479862\n",
      "         Iterations 5\n"
     ]
    }
   ],
   "source": [
    "protein_enrich_logr_results, count = {}, 0\n",
    "for z_score in [2, 10, 20, 30]:\n",
    "    gene_misexp_gene_path = wkdir_path.joinpath(f\"2_misexp_qc/misexp_metrics/misexp_genes_tpm0.5_z{z_score}.txt\")\n",
    "    gene_misexp = set(pd.read_csv(gene_misexp_gene_path, sep=\"\\t\", header=None)[0].tolist())\n",
    "    gene_never_misexp_path = wkdir_path.joinpath(f\"2_misexp_qc/misexp_metrics/never_misexp_genes_tpm0.5_z{z_score}.txt\")\n",
    "    gene_never_misexp = set(pd.read_csv(gene_never_misexp_path, sep=\"\\t\", header=None)[0].tolist())\n",
    "    # categorical variable: misexpressed, never misexpressed \n",
    "    gene_protein_features_df[\"misexp_group\"] = np.where(gene_protein_features_df.gene_id.isin(gene_never_misexp), 0, 1)\n",
    "    # logistic regression \n",
    "    for feature in protein_features:\n",
    "        print(feature)\n",
    "        input_df = gene_protein_features_df.copy()\n",
    "        input_df[f\"{feature}_norm\"] = (input_df[feature] - input_df[feature].mean())/input_df[feature].std()\n",
    "        y, X = dmatrices(f\"misexp_group ~ {feature}_norm\", input_df, return_type = 'dataframe')\n",
    "        logit_fit = discrete_model.Logit(endog=y, exog=X).fit()\n",
    "        log_odds, pval = logit_fit.params[1], logit_fit.pvalues[1]\n",
    "        # normal approximation confidence intervals\n",
    "        lower_conf = logit_fit.conf_int(alpha=0.05)[0][1]\n",
    "        upper_conf = logit_fit.conf_int(alpha=0.05)[1][1]\n",
    "        protein_enrich_logr_results[count] = [z_score, feature, log_odds, lower_conf, upper_conf, pval]\n",
    "        count += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "c946cdf4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# results \n",
    "protein_enrich_logr_results_df = pd.DataFrame.from_dict(protein_enrich_logr_results, \n",
    "                                                orient=\"index\", \n",
    "                                                columns=[\"z_score\", \"feature\", \"log_odds\", \"lower\", \"upper\", \"pval\"])\n",
    "# multiple testing correction \n",
    "pval_as_array = protein_enrich_logr_results_df.pval.to_numpy()\n",
    "# FDR BH and Bonferroni correction \n",
    "for method in [\"fdr_bh\", \"bonferroni\"]:\n",
    "    pass_test, pval_adj, _, _ = multitest.multipletests(pval_as_array, alpha=0.05, method=method)\n",
    "    protein_enrich_logr_results_df[f\"{method}_pass\"] = pass_test\n",
    "    protein_enrich_logr_results_df[f\"{method}_pval_adj\"] = pval_adj\n",
    "protein_enrich_logr_results_df[\"feature_name\"] = protein_enrich_logr_results_df.feature.replace(feature_name_dict)\n",
    "# add feature group \n",
    "features_to_group_dict = {}\n",
    "for group in feature_groups_dict.keys(): \n",
    "    for feature in feature_groups_dict[group]: \n",
    "        features_to_group_dict[feature] = group \n",
    "protein_enrich_logr_results_df[\"feature_group\"] = protein_enrich_logr_results_df.feature.replace(features_to_group_dict)\n",
    "# calculate absolute value of log-odds \n",
    "protein_enrich_logr_results_df[\"log_odds_abs\"] = protein_enrich_logr_results_df.log_odds.abs()\n",
    "# z-score > 2, passing Bonferroni correction  \n",
    "protein_enrich_logr_results_pass_z2_df = protein_enrich_logr_results_df[(protein_enrich_logr_results_df.bonferroni_pass) & \n",
    "                                                                        (protein_enrich_logr_results_df[\"z_score\"] == 2)\n",
    "                                                                       ].sort_values(by=\"log_odds_abs\", ascending=False)\n",
    "# top 15 features\n",
    "protein_enrich_logr_top_results_pass_z2_df = protein_enrich_logr_results_pass_z2_df.head(top_results).copy()\n",
    "protein_top_features = set(protein_enrich_logr_top_results_pass_z2_df.feature.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "36b5740b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# concatenate all results\n",
    "protein_enrich_logr_results_df[\"gene_type\"] = \"protein-coding\"\n",
    "lncrna_enrich_logr_results_df[\"gene_type\"] = \"lncRNA\"\n",
    "enrich_logr_results_df[\"gene_type\"] = \"all\"\n",
    "\n",
    "all_enrich_test_results_df = pd.concat([enrich_logr_results_df.drop(columns=[\"log_odds_adj\"]), protein_enrich_logr_results_df, lncrna_enrich_logr_results_df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "c0888c93",
   "metadata": {},
   "outputs": [],
   "source": [
    "# select top 15 features from each group \n",
    "combined_top15_features = protein_top_features.union(lncrna_top_features).union(all_top_features)\n",
    "all_enrich_test_results_top15_z2_df = all_enrich_test_results_df[all_enrich_test_results_df.feature.isin(combined_top15_features) &\n",
    "                                                        (all_enrich_test_results_df[\"z_score\"] == 2)].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "06b868de",
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_names_in_plot = {'Gene length':'Gene length',\n",
    "                         'gnomAD LOEUF':'gnomAD LOEUF',\n",
    "                         'OMIM':'OMIM',\n",
    "                         '# enhancers (activity-linking)': '# enhancers (activity)',\n",
    "                         '# enhancers (proximity-linking)': '# enhancers (proximity)',\n",
    "                         'Conserved enhancer bp (proximity-linking)': 'Conserved enhancer bp (proximity)',\n",
    "                         'Conserved # enhancers (proximity-linking)': 'Conserved # enhancers (proximity)',\n",
    "                         'Enhancer bp (proximity-linking)': 'Enhancer bp (proximity)',\n",
    "                         'Brain': 'Brain expression',\n",
    "                         '# tissues expressed': '# tissues expressed',\n",
    "                         'Enhancer Domain Score (EDS)': 'Enhancer Domain Score', \n",
    "                         'Episcore': 'Episcore',\n",
    "                         \"gnomAD missense OEUF\": \"gnomAD missense OEUF\", \n",
    "                         'Pituitary': 'Pituitary expression', \n",
    "                         'Heart': 'Heart expression', \n",
    "                         \"Protein-coding\": \"Protein-coding\"\n",
    "                        }\n",
    "\n",
    "\n",
    "# write results adjusting name for plot \n",
    "all_enrich_test_results_top15_z2_df[\"feature_name_trunc\"] = all_enrich_test_results_top15_z2_df.feature_name.replace(feature_names_in_plot)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "06a2d7c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# write to file \n",
    "all_enrich_test_results_top15_z2_path = out_dir_path.joinpath(f\"enrich_logr_results_gene_types_z2_bonf.csv\")\n",
    "all_enrich_test_results_top15_z2_df.to_csv(all_enrich_test_results_top15_z2_path, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f977fb5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
